Expand Code
# Load Libraries
start_time_all <- Sys.time()
options(warn = 1) # output warnings as they appear for traceability in stdout/stderr record
knitr::opts_chunk$set(echo = TRUE, warning = FALSE) # warnings will go to console
quiet_library <- function(...) {
suppressPackageStartupMessages(library(...))
}
quiet_library(batchreporter)
quiet_library(Matrix) # dependency of H5weaver
quiet_library(rhdf5) # dependency of H5weaver
quiet_library(H5weaver) # aifi package
quiet_library(HTOparser) # aifi package
quiet_library(ggplot2)
quiet_library(dplyr) # data wrangling
quiet_library(cowplot) # arranging multiple plots
quiet_library(gt) # formatted table output
quiet_library(plotly) # interactive plots
quiet_library(tidyr) # data wrangling
quiet_library(jsonlite) # reading and writing json metadata files
quiet_library(purrr)
quiet_library(future) # multi-threading for batch umap creation
quiet_library(future.apply) # multi-threading for batch umap creation
quiet_library(Seurat)
stm("Starting NGS Batch Report")
stm(paste(c("\t",paste(names(Sys.info()),Sys.info(),sep = ": ")), collapse = "\n\t"))
Argument Parsing
# give input directory rna-specific name
if(is.null(params$in_dir)) {
batch_id <- "X070"
in_dir <- system.file("extdata/X070", package = "batchreporter")
in_key <- system.file("extdata/example_sample_key_X070.csv", package = "batchreporter")
in_config <- system.file("extdata/default_rna_config_v1.csv", package = "batchreporter")
in_batch_meta <- system.file("extdata/batch-metadata.json", package = "batchreporter")
in_method_string <- "scrna;scatac;adt;hto"
out_dir <- tempdir()
} else {
batch_id <- params$batch_id
in_method_string <- params$in_method
in_dir <- params$in_dir
in_key <- params$in_key
in_config <- params$in_config
if(is.null(in_config)){
in_config <- system.file("extdata/default_rna_config_v1.csv", package = "batchreporter")
stm("No config file provided. Using default_rna_config_v1.csv for pbmc data")
}
out_dir <- params$out_dir
in_batch_meta <- params$in_batch_meta
if(is.null(in_batch_meta)){in_batch_meta <- NA}
}
stm(paste0("IN Batch : ", batch_id))
stm(paste0("IN Method : ", in_method_string))
stm(paste0("IN Directory : ", in_dir))
stm(paste0("IN Sample Key : ", in_key))
stm(paste0("IN Config : ", in_config))
stm(paste0("IN Batch Processing Info : ", in_batch_meta))
stm(paste0("OUT Dir : ", out_dir))
print(paste0("IN Batch : ", batch_id))
## [1] "IN Batch : X000"
print(paste0("IN Method : ", in_method_string))
## [1] "IN Method : scrna;scatac;adt"
print(paste0("IN Directory : ", in_dir))
## [1] "IN Directory : ~/Packages/batchreporter/inst/extdata/X070"
print(paste0("IN Sample Key : ", in_key))
## [1] "IN Sample Key : ~/Packages/batchreporter/inst/extdata/example_sample_key_X070_dummy-nonhashed.csv"
print(paste0("IN Config : ", in_config))
## [1] "IN Config : ~/Packages/batchreporter/inst/extdata/default_rna_config_v1.csv"
print(paste0("IN Batch Processing Info : ", in_batch_meta))
## [1] "IN Batch Processing Info : NA"
print(paste0("OUT Dir : ", out_dir))
## [1] "OUT Dir : ~/Projects/X070/test_reports"
Check input files
if(!dir.exists(in_dir)) {
stm(paste("ERROR: Cannot find IN results dir:", in_dir))
stop()
}
if(!file.exists(in_key)) {
stm(paste("ERROR: Cannot find IN sample key:", in_key))
stop()
}
if(!file.exists(in_config)) {
stm(paste("ERROR: Cannot find IN config file:", in_config))
stop()
}
if(!is.na(in_batch_meta) && !file.exists(in_batch_meta)) {
stm(paste("ERROR: Cannot find IN batch meta:", in_batch_meta))
stop()
}
if(!dir.exists(out_dir)) {
stm(paste("Creating output directory:", out_dir))
dir.create(out_dir)
}
out_prefix <- file.path(out_dir, paste0(batch_id, "_"))
Read in the sample key
stm("Reading in sample key")
df_key <- data.table::fread(in_key)
has_controls <- any(grepl("Control", df_key$Type)) # used to control evaluation of batch control-related code chunks
assertthat::assert_that(length(unique(df_key$BatchID)) == 1, msg = "More than 1 batch in input sample key file")
## [1] TRUE
assertthat::assert_that(batch_id == unique(df_key$BatchID), msg = "Batch in input key file does not match input batch value")
## [1] TRUE
Determine which modalities streams were run
defined_modalities <- c("scrna", "scatac", "adt", "hto")
# convert method string to vector
in_method <- strsplit(in_method_string, split = ";")[[1]]
in_method <- tolower(in_method)
# Logic check input methods
if(!all(in_method %in% defined_modalities)){
unknowns <- setdiff(in_method, defined_modalities)
stop(sprintf("One or more input methods are not in defined modalities: '%s'. Defined modalities are: [%s]. Input methods should be passed as a ';'-delimited string, ie 'scrna;scatac;hto'.",
paste(unknowns, collapse = "', '"),
paste(defined_modalities, collapse = ', ')))
}
has_rna <- "scrna" %in% in_method
has_atac <- "scatac" %in% in_method
has_adt <- "adt" %in% in_method
has_hto <- "hto" %in% in_method
Define and check input folder expectations
if(has_rna){
in_rna <- file.path(in_dir, "scrna")
if(!dir.exists(in_rna)){
stop(sprintf("Expected RNA input directory [%s] does not exist.", in_rna))
}
}
if(has_atac){
in_atac <- file.path(in_dir, "scatac")
if(!dir.exists(in_atac)){
stop(sprintf("Expected ATAC input directory [%s] does not exist.", in_atac))
}
}
if(has_adt){
in_adt <- file.path(in_dir, "adt")
if(!dir.exists(in_adt)){
stop(sprintf("Expected ADT input directory [%s] does not exist.", in_adt))
}
}
if(has_hto){
in_hto <- file.path(in_dir, "hto")
if(!dir.exists(in_hto)){
stop(sprintf("Expected HTO input directory [%s] does not exist.", in_hto))
}
}
stm("Constructing Batch Information table")
# Summarize batch information, also declare some global batch variables that are used throughout the report
pools <- unique(df_key$PoolID)
n_pools <- length(pools)
if(n_pools==1 & grepl("P0$", pools)){
n_pools_label <- 0
} else {
n_pools_label <- n_pools
}
wells <- unlist(strsplit(unique(df_key$WellID), split = ";"))
n_wells <- length(wells)
samples <- unique(df_key$SampleID)
n_samples <- length(samples)
controls <- unique(df_key$SampleID[df_key$Type == "Control"])
control_string <- ifelse(has_controls, paste(controls, collapse = ", "), "None")
if (has_controls){
n_study_samples <- setdiff(samples, controls)
} else {
n_study_samples <- samples
}
n_study_samples <- length(n_study_samples)
samples_pool <- tapply(df_key$SampleID, df_key$PoolID, unique)
samples_pool_string <- sapply(samples_pool, function(x){paste(x, collapse = ", ")})
labels <- c("Batch","Libraries","N Samples", "N Pools","N Wells","Batch Control", paste0(pools, " Samples"))
values <- c(batch_id, in_method_string, n_study_samples, n_pools_label, n_wells, control_string, samples_pool_string)
simple_html_table(labels, values, fontsize = 3, col_widths_px = c(175, 850))
| Batch | X000 |
| Libraries | scrna;scatac;adt |
| N Samples | 3 |
| N Pools | 1 |
| N Wells | 3 |
| Batch Control | None |
| X000-P1 Samples | Sample01, Sample02, Sample03 |
if(!is.na(in_batch_meta)){
if(!file.exists(in_batch_meta)) {stop(sprintf("Supplied in_batch_meta file '%s' does not exist. ",in_batch_meta))}
meta_table <- jsonlite::read_json(in_batch_meta)
meta_table <- as.data.frame(meta_table) # collapses any nested names
if(nrow(meta_table) == 1){
simple_html_table(colnames(meta_table), unlist(meta_table[1,]), fontsize = 3, col_widths_px = c(200,250))
} else{
stop(sprintf("in_batch_meta file should have 1:1 key value pairs. Check supplied file '%s' for proper formatting. ",in_batch_meta))
}
} else {
cat("No processing details supplied")
}
No processing details supplied
The following metrics summarize the sequencing and alignment by 10x well prior to un-hashing and hash-based cell filtering.
Expand Code
Check Dependencies
Reading in well info from h5 files
if(!exists("well_info")){
# Merge Well Data
stm("Reading in labeled h5 file well data")
well_list <- lapply(all_h5, read_h5_well_meta)
# Find common columns
cols_keep <- Reduce(intersect, lapply(well_list, colnames))
well_list <- lapply(well_list, function(x){x[,cols_keep]})
well_info <- unique(do.call(rbind, well_list))
setDT(well_info)
well_info[, pool_id := gsub("C\\d+W\\d+", "", well_id)]
remove("well_list")
}
Reading in metadata from h5 files
stm("Reading and merging all rna meta data")
rna_meta_list <- lapply(all_h5, H5weaver::read_h5_cell_meta)
# make sure column names are the same
col_list <- lapply(rna_meta_list, colnames)
all_cols_identical <- length(unique(col_list)) == 1
if(!all_cols_identical){
all_columns <- unique(unlist(lapply(rna_meta_list, colnames)))
common_columns <- Reduce(union, lapply(rna_meta_list, colnames))
if(!all(all_columns %in% common_columns)){
stm(sprintf("Warning: rna h5 files do not contain the same meta data columns. Keeping only the common columns. Removing columns %s.",
paste(setdiff(all_columns, common_columns), sep = ", ")))
}
rna_meta_list <- lapply(rna_meta_list, function(x){x[, common_columns]})
}
# merge metadata
rna_meta <- do.call(rbind, rna_meta_list)
# add metadata variables
rna_meta$fct_mito_umi <- rna_meta$n_mito_umi/rna_meta$n_umis
fct_mito_grp_cutoffs <- c(-Inf, 0.05, 0.10, 0.20, 0.30,Inf)
fct_mito_grp_labels <- c("0-0.05","0.05-0.10","0.10-0.20","0.20-0.30",">0.30")
rna_meta$fct_mito_group <- cut(rna_meta$fct_mito_umi, breaks =fct_mito_grp_cutoffs,
labels = fct_mito_grp_labels)
Read in Counts from H5 Files
stm("Reading and merging all rna count matrices")
rna_count_list <- lapply(all_h5, H5weaver::read_h5_dgCMatrix, target = "matrix",
feature_names = "id")
# make sure all matrices have same number of rows
if(!length(unique(sapply(rna_count_list, nrow)))==1){
stop("RNA count matrixes have different numbers of rows")
}
# make sure rows are in same order
row_order <- rownames(rna_count_list[[1]])
rna_count_list <- lapply(rna_count_list, function(x){x[row_order,]})
# make sure columns are in same order as metadata
order_check <- mapply(function(x, y){(all(x$barcodes==colnames(y)))}, rna_meta_list, rna_count_list)
if(!all(order_check)){
# Reorder matrix columns to be consistent with metadata
rna_count_list <- mapply( function(x, y){x[,y$barcodes]}, rna_count_list, rna_meta_list)
}
# merge
rna_counts <- do.call(cbind, rna_count_list)
featDF <- read_h5_feature_meta(all_h5[1])
Sample cells from each well to generate umaps
n_cells_sample <- 2000
stm(sprintf("Sampling %s cells per well (or all cells if fewer)", n_cells_sample))
set.seed(3)
sample_index_list <- lapply(rna_meta_list, function(x){
sample_size = min(n_cells_sample, nrow(x))
sort(sample(1:nrow(x), size = sample_size, replace = FALSE))
})
n_files <- length(sample_index_list)
rna_meta_list_sampled <- lapply(1:n_files, function(x){
rna_meta_list[[x]][sample_index_list[[x]],]
})
rna_count_list_sampled <- lapply(1:n_files, function(x){
rna_count_list[[x]][,sample_index_list[[x]]]
})
rna_meta_sampled <- do.call(rbind, rna_meta_list_sampled)
rna_meta_sampled$fct_mito_umi <- rna_meta_sampled$n_mito_umi/rna_meta_sampled$n_umis
rna_meta_sampled$fct_mito_group <- cut(rna_meta_sampled$fct_mito_umi, breaks =fct_mito_grp_cutoffs,
labels = fct_mito_grp_labels)
rna_counts_sampled <- do.call(cbind, rna_count_list_sampled)
rm(list=c("rna_count_list", "rna_count_list_sampled","rna_meta_list"))
vnames_rna <- c("estimated_number_of_cells", "fraction_reads_in_cells",
"mean_reads_per_cell", "median_genes_per_cell", "median_umi_counts_per_cell",
"number_of_reads", "q30_bases_in_barcode", "q30_bases_in_rna_read",
"q30_bases_in_sample_index", "q30_bases_in_umi", "reads_mapped_antisense_to_gene",
"reads_mapped_confidently_to_exonic_regions", "reads_mapped_confidently_to_genome",
"reads_mapped_confidently_to_intergenic_regions","reads_mapped_confidently_to_intronic_regions",
"reads_mapped_confidently_to_transcriptome", "reads_mapped_to_genome",
"sequencing_saturation", "total_genes_detected", "valid_barcodes")
vnames_arc <- c("estimated_number_of_cells",
"gex_fraction_of_transcriptomic_reads_in_cells","gex_mean_raw_reads_per_cell","gex_median_genes_per_cell",
"gex_median_umi_counts_per_cell","gex_percent_duplicates","gex_q30_bases_in_barcode",
"gex_q30_bases_in_read_2","gex_q30_bases_in_sample_index_i1","gex_q30_bases_in_sample_index_i2",
"gex_q30_bases_in_umi","gex_reads_mapped_antisense_to_gene","gex_reads_mapped_confidently_to_exonic_regions",
"gex_reads_mapped_confidently_to_genome","gex_reads_mapped_confidently_to_intergenic_regions", "gex_reads_mapped_confidently_to_intronic_regions",
"gex_reads_mapped_confidently_to_transcriptome","gex_reads_mapped_to_genome","gex_reads_with_tso",
"gex_sequenced_read_pairs","gex_total_genes_detected","gex_valid_barcodes", "gex_valid_umis")
if(all(vnames_rna %in% colnames(well_info))){
vnames <- vnames_rna
vlabels <- c("Estimated Number of Cells", "Fraction Reads in Cells",
"Mean Reads per Cell", "Median Genes per Cell", "Median UMI per Cell",
"Number of Reads", "Q30 Bases in Barcode (%)", "Q30 Bases in RNA Read (%)",
"Q30 Bases in Sample Index (%)", "Q30 Bases in UMI (%)", "Reads Mapped Antisense to Gene (%)",
"Reads Mapped Confidently to Exonic Regions (%)", "Reads Mapped Confidently to Genome (%)",
"Reads Mapped Confidently to Intergenic Regions (%)",
"Reads Mapped Confidently to Intronic Regions (%)",
"Reads Mapped Confidently to Transcriptome (%)", "Reads Mapped to Genome (%)",
"Sequencing Saturation (%)", "Total Genes Detected", "Valid Barcodes (%)")
n_vars <- length(vnames)
vartypes <- c(rep("Cells", 5), rep("Sequencing", 5), rep("Mapping", 7),"Sequencing","Cells","Sequencing")
vartypes <- factor(vartypes, levels = c("Cells", "Sequencing", "Mapping"))
digitsRound <- c(0, 1, rep(0, 4), rep(1, 12), 0, 1)
rna_data_type <- "rna"
} else if (all(vnames_arc %in% colnames(well_info))){
vnames <- vnames_arc
vlabels <- c("Estimated Number of Cells","Fraction Transcriptomic Reads in Cells",
"Mean Raw Reads per Cell", "Median Genes per Cell", "Median UMI per Cell", "Percent Duplicates",
"Q30 Bases in Barcode", "Q30 Bases in RNA Read 2", "Q30 Bases in Sample Index i1",
"Q30 Bases in Sample Index i2", "Q30 Bases in UMI", "Reads Mapped Antisense to Gene",
"Reads Mapped Confidently to Exonic Regions", "Reads Mapped Confidently to Genome",
"Reads Mapped Confidently to Intergenic Regions",
"Reads Mapped Confidently to Intronic Regions",
"Reads Mapped Confidently to Transcriptome", "Reads Mapped to Genome", "Reads with TSO",
"Sequenced Read Pairs", "Total Genes Detected", "Valid Barcodes", "Valid UMIs")
n_vars <- length(vnames)
vartypes <- c(rep("Cells", 5), rep("Sequencing", 6), rep("Mapping", 7),"Sequencing","Sequencing","Cells","Sequencing","Sequencing")
digitsRound <- c(0, 3, 0, 0, 0, rep(3, 14), 0, 0, 3, 3)
rna_data_type <- "arc"
} else {
vnames <- setdiff( colnames(well_info), c("well_id","pool_id"))
vlabels <- gsub("_"," ", vnames)
vartypes <- rep(NA, length(vnames))
getDigits <- function(vNumeric){
vChar <- as.character(vNumeric)
is_decimal <- any(grepl("[.]", vChar))
if(is_decimal){
decimal_digits <- nchar(vChar) - (nchar(gsub("[.].*","", vChar))+1)
n_digits <- max(decimal_digits)
} else {
n_digits <- 0
}
n_digits
}
digitsRound <- sapply(subset(well_info, select = vnames), getDigits)
rna_data_type <- "unknown"
}
df_vars <- data.frame(Category = vartypes,
Variable_name = vlabels,
Variable = vnames,
Round = digitsRound)
Summary by pool or by entire non-hashed batch
stm("Creating scrna well cellranger summary table")
if(rna_data_type == "rna"){
pool_info <- well_info %>%
dplyr::group_by(pool_id) %>%
dplyr::summarize(total_cells = formatC(sum(estimated_number_of_cells), big.mark = ",", digits = 0, format = "f"),
total_reads = formatC(sum(number_of_reads), big.mark = ",", digits = 0, format = "f"), .groups = "drop")
} else if (rna_data_type == "arc"){
pool_info <- well_info %>%
dplyr::group_by(pool_id) %>%
dplyr::summarize(total_cells = formatC(sum(estimated_number_of_cells), big.mark = ",", digits = 0, format = "f"),
total_sequenced_read_pairs = formatC(sum(gex_sequenced_read_pairs), big.mark = ",", digits = 0, format = "f"), .groups = "drop")
} else {
print("Warning: No total reads column detected in well metadata")
pool_info <- well_info %>%
dplyr::group_by(pool_id) %>%
dplyr::summarize(total_cells = formatC(sum(estimated_number_of_cells), big.mark = ",", digits = 0, format = "f"),
total_reads = NA, .groups = "drop")
}
names(pool_info) <- stringr::str_to_title(gsub("_", " ", names(pool_info)))
pool_info %>%
gt::gt() %>%
gt::cols_align(align = "right", columns = 2:3)
| Pool Id | Total Cells | Total Sequenced Read Pairs |
|---|---|---|
| X070-P1 | 53,132 | 2,081,986,539 |
stm("Creating detailied scrna well cellranger table")
unique_pools <- sort(unique(well_info$pool_id))
well_summary_table <- well_info %>%
gather(key = Variable, value = Value, all_of(vnames)) %>% # all variables long
full_join(df_vars, by = "Variable") %>%
group_by(pool_id, Category, Variable, Variable_name) %>%
summarize(Median = formatC(median(Value, na.rm=T), big.mark = ",", digits = unique(Round), format = "f"),
Range = get_range(Value, digits = unique(Round), verbose = F),
`CV%` = round(sd(Value)/mean(Value)*100, 1),
.groups = "drop") %>%
arrange(Category, Variable_name) %>%
tidyr::pivot_wider(id_cols = c("Category", "Variable", "Variable_name"),
names_from = pool_id,
values_from = c("Median", "Range", "CV%"),
names_glue = "{pool_id}__{.value}",
names_sort = TRUE) %>%
mutate(Plot = sprintf("[Plot](#%s)", Variable)) %>%
select(Category, Variable_name, contains(unique_pools), Plot) # reorder cols by pools first then stats, keep only the clean var name
gt_table <- well_summary_table %>%
gt::gt() %>%
gt::fmt_markdown(columns = "Plot") %>% # convert the plot column text to markdown to activate links. these links generated with plots below.
gt::cols_width(vars(Category) ~ px(100),
vars(Variable_name) ~ px(150),
ends_with("Median") ~ px(100),
ends_with("Range") ~ px(100),
ends_with("CV%") ~ px(50),
vars(Plot) ~ px(60)) %>%
gt::tab_options(table.font.size = 11, column_labels.font.size = 12) %>%
gt::tab_spanner_delim(delim = "__") %>%
gt::cols_align(align = "right",
columns = c(ends_with("Median"), ends_with("Range"))) %>%
gt::cols_align(align = "center",
columns = vars(Plot)) %>%
gt::cols_label(Variable_name = "Variable")
gt_table
| Category | Variable | X070-P1 | Plot | ||
|---|---|---|---|---|---|
| Median | Range | CV% | |||
| Cells | Estimated Number of Cells | 17,743 | 17,190-18,199 | 2.9 | |
| Cells | Fraction Transcriptomic Reads in Cells | 0.885 | 0.880-0.886 | 0.4 | |
| Cells | Mean Raw Reads per Cell | 39,754 | 35,614-42,379 | 8.7 | |
| Cells | Median Genes per Cell | 1,992 | 1,962-2,050 | 2.2 | |
| Cells | Median UMI per Cell | 4,130 | 4,048-4,297 | 3.1 | |
| Cells | Total Genes Detected | 30,628 | 30,599-30,756 | 0.3 | |
| Mapping | Reads Mapped Antisense to Gene | 0.166 | 0.165-0.171 | 1.7 | |
| Mapping | Reads Mapped Confidently to Exonic Regions | 0.392 | 0.384-0.393 | 1.2 | |
| Mapping | Reads Mapped Confidently to Genome | 0.905 | 0.904-0.905 | 0.1 | |
| Mapping | Reads Mapped Confidently to Intergenic Regions | 0.071 | 0.071-0.072 | 0.3 | |
| Mapping | Reads Mapped Confidently to Intronic Regions | 0.442 | 0.441-0.449 | 1.0 | |
| Mapping | Reads Mapped Confidently to Transcriptome | 0.659 | 0.655-0.661 | 0.5 | |
| Mapping | Reads Mapped to Genome | 0.968 | 0.968-0.970 | 0.1 | |
| Sequencing | Percent Duplicates | 0.775 | 0.758-0.781 | 1.5 | |
| Sequencing | Q30 Bases in Barcode | 0.958 | 0.956-0.959 | 0.2 | |
| Sequencing | Q30 Bases in RNA Read 2 | 0.927 | 0.922-0.930 | 0.4 | |
| Sequencing | Q30 Bases in Sample Index i1 | 0.974 | 0.940-0.974 | 2.0 | |
| Sequencing | Q30 Bases in Sample Index i2 | 0.955 | 0.942-0.958 | 0.9 | |
| Sequencing | Q30 Bases in UMI | 0.959 | 0.957-0.959 | 0.1 | |
| Sequencing | Reads with TSO | 0.119 | 0.116-0.130 | 6.1 | |
| Sequencing | Sequenced Read Pairs | 705,357,243 | 648,135,670-728,493,626 | 6.0 | |
| Sequencing | Valid Barcodes | 0.940 | 0.939-0.941 | 0.1 | |
| Sequencing | Valid UMIs | 0.999 | 0.999-0.999 | 0.0 | |
Expand table of statistics per well
qc_table(well_info)
stm("Generating sequencing and alignment QC plots")
verpal <- hcl.colors(n = n_vars, palette = "viridis")
# Plots
for (i in seq_along(vnames)){
df <- data.table::copy(well_info)
spec <- vnames[i]
slabel <- vlabels[i]
df <- as.data.frame(df)
df$spec_col <- df[,spec]
med_val <- median(df$spec_col)
cv <- round(sd(df$spec_col)/mean(df$spec_col)*100, 2)
n <- sum(!is.na(df$spec_col))
g <- ggplot(df, aes(well_id, spec_col)) +
geom_bar(stat = "identity", fill = verpal[i]) +
geom_hline(yintercept = med_val, linetype = "dashed", color = "red")+
scale_y_continuous(sec.axis = dup_axis(breaks = med_val, labels = med_val, name = ""))+
xlab("Well") +
ylab(slabel) +
facet_wrap(~pool_id, ncol = n_pools, scales = "free_x", drop = TRUE) +
ggtitle(slabel,
subtitle = sprintf("Median=%s CV=%.1f%% N=%s", med_val, cv, n)) +
theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5))
# Plot-specific hyperlink definition
cat(sprintf('\n<a id="%s"></a>', spec), labels = "", sep = "\n")
# Output plot
suppressWarnings(print(g))
# Link back to top of section
cat(" \n[Return to Contents](#rna_seq_well_top)", labels = "", sep = "\n")
}
# Reads per hto cat
stm("Generating read count violin plots")
# Reads per hto plot
g_read <- qc_violin_plot(rna_meta,
category_x = "well_id",
name_x = "Well",
column_y = "n_reads",
name_y = "N Reads per Cell",
log_y = TRUE,
fill = "dodgerblue") +
ggtitle("Reads per Well")
temp_figwidth = max(5, 0.5 + n_wells*0.4)
temp_figheight = 4
batchreporter::make_subchunk(g_read, subchunk_name = "well_read_violin_subchunk",
chunk_opt_list = list(fig.height = temp_figheight, fig.width = temp_figwidth,
warning = FALSE),
quiet_knit = TRUE)
(function () { g})()
# Reads per hto cat
stm("Generating umi count violin plots")
# UMI per hto plot
g_umi <- qc_violin_plot(rna_meta,
category_x = "well_id",
name_x = "Well",
column_y = "n_umis",
name_y = "N UMIs per Cell",
log_y = TRUE,
fill = "purple") +
ggtitle("UMIs per Well")
temp_figwidth = max(5, 0.5 + n_wells*0.4)
temp_figheight = 4
batchreporter::make_subchunk(g_umi, subchunk_name = "well_umi_violin_subchunk",
chunk_opt_list = list(fig.height = temp_figheight, fig.width = temp_figwidth,
warning = FALSE),
quiet_knit = TRUE)
(function () { g})()
# Reads per hto cat
stm("Generating gene count violin plots")
# Reads per hto plot
g_genes <- qc_violin_plot(rna_meta,
category_x = "well_id",
name_x = "Well",
column_y = "n_genes",
name_y = "N Genes per Cell",
log_y = TRUE,
fill = "orangered") +
ggtitle("Genes per Well")
temp_figwidth = max(5, 0.5 + n_wells*0.4)
temp_figheight = 4
batchreporter::make_subchunk(g_genes, subchunk_name = "well_gene_violin_subchunk",
chunk_opt_list = list(fig.height = temp_figheight, fig.width = temp_figwidth,
warning = FALSE),
quiet_knit = TRUE)
(function () { g})()
g_bar <- ggplot(rna_meta, aes(well_id, fill = factor(fct_mito_group,
levels = rev(fct_mito_grp_labels)))) +
geom_bar() +
labs(fill = "Fraction Mitochondrial UMIs") +
ylab("N Cells") +
theme_bw() +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5))
g_pl <- plotly::ggplotly(g_bar) %>%
layout(hovermode = 'compare')
tempwidth <- n_samples*0.4 + 2
make_subchunk(g_pl, subchunk_name = "fraction_mito_subchunk", quiet_knit = T,
chunk_opt_list = list(fig.width = tempwidth, fig.height = 6, echo = FALSE))
stm("Generating mt umi vs umi count scatter plots")
# Reads per hto plot
g_mito_umi <- ggplot(rna_meta, aes(n_umis, fct_mito_umi)) +
geom_point(color = "purple", alpha = 0.2) +
xlab("UMI per Cell (log10 scale)") +
ylab("Fraction Mitochondrial UMI") +
scale_x_log10() +
ggtitle("Fraction Mitochondrial UMI vs UMI per Cell")
g_mito_umi
Expand Per-Well Plot
stm("Generating mt umi vs umi count scatter plots by well")
n_wells_plot <- length(unique(rna_meta$well_id))
ncols_plot <- 6
nrows_plot <- ceiling(n_wells_plot/ncols_plot)
# Reads per hto plot
g_mito_umi_well <- g_mito_umi +
facet_wrap(~well_id, ncol = ncols_plot)
temp_figwidth <- 2*min(n_wells_plot, ncols_plot)+0.3
temp_figheight <- 2*nrows_plot + 0.3
make_subchunk(g_mito_umi_well, subchunk_name = "fraction_mito_umis_well_subchunk", quiet_knit = T,
chunk_opt_list = list(fig.width = temp_figwidth, fig.height = temp_figheight, echo = FALSE))
stm("Generating mt umi vs gene count scatter plots")
# Reads per hto plot
g_mito_gene <- ggplot(rna_meta, aes(n_genes, fct_mito_umi)) +
geom_point(color = "orangered", alpha = 0.2) +
xlab("Genes per Cell (log10 scale)") +
ylab("Fraction Mitochondrial UMI") +
scale_x_log10() +
ggtitle("Fraction Mitochondrial UMI vs UMI per Cell")
g_mito_gene
Expand Per-Well Plot
stm("Generating mt umi vs gene count scatter plots by well")
n_wells_plot <- length(unique(rna_meta$well_id))
ncols_plot <- 6
nrows_plot <- ceiling(n_wells_plot/ncols_plot)
# Reads per hto plot
g_mito_gene_well <- g_mito_gene +
facet_wrap(~well_id, ncol = ncols_plot)
temp_figwidth <- 2*min(n_wells_plot, ncols_plot)+0.3
temp_figheight <- 2*nrows_plot + 0.3
make_subchunk(g_mito_gene_well, subchunk_name = "fraction_mito_genes_well_subchunk", quiet_knit = T,
chunk_opt_list = list(fig.width = temp_figwidth, fig.height = temp_figheight, echo = FALSE))
Expand code
# Create Seurat object
stm("Creating Seurat object from merged data for scRNA UMAP")
merged_so <- Seurat::CreateSeuratObject(counts = rna_counts_sampled)
# Normalize data
stm("Normalizing data for scRNA UMAP")
merged_so <- Seurat::NormalizeData(object = merged_so,
normalization.method = "LogNormalize",
scale.factor = 10000,
margin = 1)
normCounts <- merged_so[["RNA"]]@data
pc_dims <- min(ncol(merged_so) - 1, 50)
# suppressWarnings(future::plan("multiprocess", workers = avail_workers))
stm("Finding Variable Features for scRNA UMAP")
merged_so <- FindVariableFeatures(object = merged_so)
stm("Scaling Data for scRNA UMAP")
merged_so <- ScaleData(object = merged_so, verbose = FALSE)
stm("Running PCA for scRNA UMAP")
merged_so <- RunPCA(object = merged_so, npcs = pc_dims, verbose = FALSE)
# suppressWarnings(future::plan("multiprocess", workers = avail_workers))
stm("Determining dimensionality via jackstraw method for scRNA UMAP")
labels_order <- rna_meta_sampled$well_id[match(colnames(merged_so),rna_meta_sampled$barcodes)]
names(labels_order) <- colnames(merged_so)
jackstraw_cells <- sample_cells(labels_order, 500, seed = 3030)
jackstraw_so <- merged_so[, jackstraw_cells]
jackstraw_so <- JackStraw(object = jackstraw_so,
dims = pc_dims,
num.replicate = 50,
verbose = FALSE)
jackstraw_so <- ScoreJackStraw(object = jackstraw_so,
dims = 1:pc_dims)
pc_pvals <- jackstraw_so@reductions$pca@jackstraw$overall.p.values[,2]
good_pcs <- pc_pvals < 0.05
nPC <- sum(good_pcs)
pc_var <- Stdev(merged_so, reduction = "pca")^2
total_var <- merged_so@reductions$pca@misc$total.variance
var_selected_pc <- sum(pc_var[good_pcs])/total_var
cumvar_string <- sprintf(fmt = "%.1f", var_selected_pc*100)
stm(sprintf("Selected %s significant pcs via JackStraw, %s%% explained variation for scRNA UMAP", nPC, cumvar_string))
pc_embeddings <- merged_so@reductions$pca@cell.embeddings
# stm(sprintf("Using %s principal components for umap",nPC))
# Run UMAP
# suppressWarnings(future::plan("multiprocess", workers = avail_workers))
stm(sprintf("Running UMAP on selected coordinates for scRNA UMAP"))
merged_so <- Seurat::RunUMAP(merged_so,
dims = c(1:50)[good_pcs],
umap.method = "uwot",
seed.use = 3,
verbose = FALSE)
# Get UMAP coordinates
umapDF <- merged_so[["umap"]]@cell.embeddings %>%
as.data.frame() %>%
dplyr::rename(UMAP_1_merged = UMAP_1, UMAP_2_merged = UMAP_2) %>%
tibble::rownames_to_column(var = "barcodes")
umapDF <- merge(umapDF, rna_meta_sampled, by = "barcodes")
rownames(umapDF) <- umapDF$barcodes
umapDF <- umapDF[rownames(merged_so@"meta.data"), , drop = F]
normCounts <- merged_so[["RNA"]]@data
if(!all(rownames(umapDF) == colnames(normCounts))){
stop("Merged UMAP dataframe does not match columns in normalized counts")
}
Batch-level UMAP of 2000 randomly sampled cells per well using 34 principal components (21.3% explained variance) selected by jackstraw.
stm("Plotting Batch scRNA UMAP by Data Quality Metrics")
# Gene scaling
max_genes <- max(8000, rna_meta$n_genes)
max_umi <- max(60000, rna_meta$n_umis)
point_size <- 0.2
# Cell Types
g_base <- ggplot(umapDF, aes(UMAP_1_merged, UMAP_2_merged))
# fraction mitochondrial umi
stm("Plotting Fraction Mito UMAP")
g1 <- plot_umap_report(df = umapDF,
x_col="UMAP_1_merged",
x_lab = "UMAP 1",
y_col = "UMAP_2_merged",
y_lab = "UMAP 2",
title = "Fraction Mitochondrial UMIs",
point_size = point_size,
color_col = "fct_mito_umi",
scale_color_fun = scale_color_fct_mito)
# N Genes
stm("Plotting N Genes UMAP")
g2 <- plot_umap_report(df = umapDF,
x_col="UMAP_1_merged",
x_lab = "UMAP 1",
y_col = "UMAP_2_merged",
y_lab = "UMAP 2",
title = "N Genes",
point_size = point_size,
color_col = "n_genes",
scale_color_fun = scale_color_genes(max_genes)
)
# N UMIs
stm("Plotting N UMIs UMAP")
g3 <- plot_umap_report(df = umapDF,
x_col="UMAP_1_merged",
x_lab = "UMAP 1",
y_col = "UMAP_2_merged",
y_lab = "UMAP 2",
title = "N UMIs",
point_size = point_size,
color_col = "n_umis",
scale_color_fun = scale_color_umis(max_umi)
)
# Wells
stm("Plotting Well UMAP")
cols_well <- H5weaver::varibow(n_colors = length(unique(umapDF$well_id)))
scale_color_well <- function(...){
scale_color_manual(values = cols_well, ...)
}
g4 <- plot_umap_report(df = umapDF,
x_col="UMAP_1_merged",
x_lab = "UMAP 1",
y_col = "UMAP_2_merged",
y_lab = "UMAP 2",
title = "Well ID",
point_size = point_size,
color_col = "well_id",
scale_color_fun = scale_color_well
) +
guides(colour = guide_legend(override.aes = list(size = 3)))
aligned_plots <- cowplot::align_plots(g1, g2, g3, g4, align = "hv", axis = "tblr") # uniform plot sizing
cowplot::plot_grid(aligned_plots[[1]],
aligned_plots[[2]],
aligned_plots[[3]],
aligned_plots[[4]],
ncol = 2)
scRNA seq report well module v.1.0.0, Lauren Okada
Expand output
quiet_library <- function(...) {
suppressPackageStartupMessages(library(...))
}
quiet_library(ggplot2)
quiet_library(Seurat)
quiet_library(viridis)
quiet_library(dplyr)
quiet_library(plyr)
quiet_library(data.table)
stm("Identifying files for ADT analysis")
# Input directory in_adt should be defined in the parent Rmarkdown report.
if(is.null(in_adt)) {
warning("No input directory for adt batch report, defaulting to test data")
in_adt <- system.file("extdata/X070/adt", package = "batchreporter")
}
adt_count_files <- list.files(in_adt,
pattern = "Tag_Counts.csv")
well_names <- gsub('.{15}$', '', adt_count_files)
adt_count_filepaths <- list.files(in_adt, pattern = "Tag_Counts.csv", full.names = T)
adt_pos_count_filepaths <- list.files(in_adt, pattern = "positive_tag_counts.csv", full.names = T)
adt_pos_count_files <- basename(adt_pos_count_filepaths)
cat(sprintf("IN ADT Tag Count files: \n\t%s", paste(adt_count_files, collapse = "\n\t")), sep = "\n")
## IN ADT Tag Count files:
## X070-EP1C1W1_Tag_Counts.csv
## X070-EP1C1W2_Tag_Counts.csv
## X070-EP1C1W3_Tag_Counts.csv
stm(sprintf("IN ADT Tag Count files: \n\t%s", paste(adt_count_files, collapse = "\n\t")))
cat(sprintf("IN POSITIVE ADT Tag Count files: \n\t%s", paste(adt_pos_count_files, collapse = "\n\t")), sep = "\n")
## IN POSITIVE ADT Tag Count files:
## X070-EP1C1W1_adt_positive_tag_counts.csv
## X070-EP1C1W2_adt_positive_tag_counts.csv
## X070-EP1C1W3_adt_positive_tag_counts.csv
stm(sprintf("IN POSITIVE ADT Tag Count files: \n\t%s", paste(adt_pos_count_files, collapse = "\n\t")))
Expand output
# all_merge <- Reduce(merge,seurat_list)
merge_seurat <- function(so_list){
if (length(so_list) > 2){
temp <- merge(x = so_list[[1]], y = so_list[[2]])
i = 3
while (i < length(so_list) + 1){
temp <- merge(x = temp, y = so_list[[i]])
i = i + 1
}
return(temp)
}
else{
temp <- merge(x = so_list[[1]], y = so_list[[2]])
}
return(temp)
}
all_merge <- merge_seurat(seurat_list)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 53132
## Number of edges: 1375642
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9449
## Number of communities: 45
## Elapsed time: 6 seconds
DimPlot(adt_positives, group.by = 'well')
n_features <- length(rownames(adt_positives))
height = ceiling((n_features / 3) * 2)
i = 1
colsums_list <- list()
while (i < length(count_list) + 1){
temp_adt_so <- seurat_list[[i]]
temp_adt_so <- SetIdent(temp_adt_so, value = 'orig.ident')
temp_adt_positives <- subset(temp_adt_so, idents = 'Cells')
temp_adt_pos_mtx <- t(data.frame(temp_adt_positives@assays$RNA@counts))
colsums_df <- data.frame(colSums(temp_adt_pos_mtx))
colnames(colsums_df) <- 'Count'
colsums_df$ADT <- rownames(colsums_df)
colsums_df$Well <- rep(well_names[[i]], length(rownames(colsums_df)))
colsums_list[[i]] <- colsums_df
i = i + 1
}
combined_colsums <- bind_rows(colsums_list, .id = "column_label")
ggplot(combined_colsums, aes(x = reorder(ADT, -Count), y = Count, color = Well)) + geom_point(size = 5) +
geom_hline(yintercept=median(combined_colsums$Count), linetype="dashed", color = "blue") +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) + scale_y_log10()
ggplot(combined_colsums, aes(x = reorder(ADT, -Count), y = Count, color = Well)) + geom_point(size = 5) +
geom_hline(yintercept=median(combined_colsums$Count), linetype="dashed", color = "blue") +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
ADT report module v.1.0.0, Zach Thomson
start_time_atac <- Sys.time()
quiet_library <- function(...) {
suppressPackageStartupMessages(library(...))
}
quiet_library(data.table)
quiet_library(H5weaver)
quiet_library(ggplot2)
quiet_library(cowplot)
quiet_library(jsonlite)
quiet_library(purrr)
options(stringsAsFactors = FALSE)
Declaring start
stm("Starting scATAC Batch Report module")
if(is.null(in_atac)) {
in_atac <- system.file("testdata/batch_qc", package = "ATAComb")
batch_id <- "X055"
out_dir <- tempdir()
}
stm(paste0("IN results dir : ", in_atac))
stm(paste0("IN BatchID : ", batch_id))
stm(paste0("OUT Directory : ", out_dir))
print(c(
paste0("IN results dir : ", in_atac),
paste0("IN BatchID : ", batch_id),
paste0("OUT H5 directory : ", out_dir)
))
## [1] "IN results dir : ~/Packages/batchreporter/inst/extdata/X070/scatac"
## [2] "IN BatchID : X000"
## [3] "OUT H5 directory : ~/Projects/X070/test_reports"
if(!dir.exists(in_atac)) {
stm(paste("ERROR: Cannot find IN results dir:", in_atac))
stop()
}
if(!dir.exists(out_dir)) {
stm(paste("Creating output directory:", out_dir))
dir.create(out_dir)
}
out_prefix <- file.path(out_dir, paste0(batch_id, "_"))
Unfiltered metadata
meta_files <- list.files(in_atac,
pattern = "_all_metadata.csv.gz$",
full.names = TRUE)
if(length(meta_files) == 0) {
stop("Can't find unfiltered metadata files. Check input directory for *_all_metadata.csv.gz files.")
}
stm("IN Full Metadata Files:")
for(meta_file in meta_files) {
stm(meta_file)
print(meta_file)
}
## [1] "/Users/lauren.okada/Packages/batchreporter/inst/extdata/X070/scatac/X070_X070-MP1C1W1_all_metadata.csv.gz"
## [1] "/Users/lauren.okada/Packages/batchreporter/inst/extdata/X070/scatac/X070_X070-MP1C1W2_all_metadata.csv.gz"
## [1] "/Users/lauren.okada/Packages/batchreporter/inst/extdata/X070/scatac/X070_X070-MP1C1W3_all_metadata.csv.gz"
meta_list <- map(meta_files, fread)
sample_names <- sub(".+/","",sub("_all_metadata.csv.gz","",meta_files))
names(meta_list) <- sample_names
Filtered metadata
filt_meta_files <- list.files(in_atac,
pattern = "_filtered_metadata.csv.gz$",
full.names = TRUE)
if(length(filt_meta_files) < length(meta_files)) {
stop("Can't find filtered metadata files. Check input directory for *_filtered_metadata.csv.gz files.")
} else if(length(filt_meta_files) > length(meta_files)) {
stop("Can't find all metadata files. Check input directory for *_all_metadata.csv.gz files.")
}
stm("IN Filtered Metadata Files:")
for(filt_meta_file in filt_meta_files) {
stm(filt_meta_file)
print(filt_meta_file)
}
## [1] "/Users/lauren.okada/Packages/batchreporter/inst/extdata/X070/scatac/X070_X070-MP1C1W1_filtered_metadata.csv.gz"
## [1] "/Users/lauren.okada/Packages/batchreporter/inst/extdata/X070/scatac/X070_X070-MP1C1W2_filtered_metadata.csv.gz"
## [1] "/Users/lauren.okada/Packages/batchreporter/inst/extdata/X070/scatac/X070_X070-MP1C1W3_filtered_metadata.csv.gz"
filt_meta_list <- map(filt_meta_files, fread)
names(filt_meta_list) <- sub(".+/","",sub("_filtered_metadata.csv.gz","",filt_meta_files))
filt_meta_list <- filt_meta_list[sample_names]
Saturation projections
sat_files <- list.files(in_atac,
pattern = "_saturation_projection.csv.gz$",
full.names = TRUE)
if(length(sat_files) < length(meta_files)) {
stop("Can't find all saturation files. Check input directory for *_saturation_projection.csv.gz files.")
} else if(length(sat_files) > length(meta_files)) {
stop("Can't find all metadata files. Check input directory for *_all_metadata.csv.gz files.")
}
stm("IN Saturation Projection Files:")
for(sat_file in sat_files) {
stm(sat_file)
print(sat_file)
}
## [1] "/Users/lauren.okada/Packages/batchreporter/inst/extdata/X070/scatac/X070_X070-MP1C1W1_saturation_projection.csv.gz"
## [1] "/Users/lauren.okada/Packages/batchreporter/inst/extdata/X070/scatac/X070_X070-MP1C1W2_saturation_projection.csv.gz"
## [1] "/Users/lauren.okada/Packages/batchreporter/inst/extdata/X070/scatac/X070_X070-MP1C1W3_saturation_projection.csv.gz"
names(sat_files) <- sub(".+/","",sub("_saturation_projection.csv.gz","",sat_files))
sat_files <- sat_files[sample_names]
sat_list <- map(sat_files, fread)
Fragment widths
width_files <- list.files(in_atac,
pattern = "_fragment_width_summary.csv.gz",
full.names = TRUE)
if(length(width_files) < length(meta_files)) {
stop("Can't find all fragment width files. Check input directory for *_fragment_width_summary.csv.gz files.")
} else if(length(sat_files) > length(meta_files)) {
stop("Can't find all metadata files. Check input directory for *_all_metadata.csv.gz files.")
}
stm("IN Fragment Width Summary Files:")
for(width_file in width_files) {
stm(width_file)
print(width_file)
}
## [1] "/Users/lauren.okada/Packages/batchreporter/inst/extdata/X070/scatac/X070_X070-MP1C1W1_fragment_width_summary.csv.gz"
## [1] "/Users/lauren.okada/Packages/batchreporter/inst/extdata/X070/scatac/X070_X070-MP1C1W2_fragment_width_summary.csv.gz"
## [1] "/Users/lauren.okada/Packages/batchreporter/inst/extdata/X070/scatac/X070_X070-MP1C1W3_fragment_width_summary.csv.gz"
names(width_files) <- sub(".+/","",sub("_fragment_width_summary.csv.gz","",width_files))
width_files <- width_files[sample_names]
width_list <- map(width_files, fread)
filtered_meta <- do.call(rbind, filt_meta_list)
meta <- do.call(rbind, meta_list)
cutoffs <- list(altius_frac = 0.5,
tss_frac = 0.2,
peaks_frac = 0.2)
meta$pass_fail <- "pass"
for(i in seq_along(cutoffs)) {
cut_name <- names(cutoffs)[i]
cut_val <- cutoffs[[i]]
cut_logic <- meta[[cut_name]] <= cut_val
meta$pass_fail[cut_logic] <- "fail"
}
meta$filtered <- meta$barcodes %in% filtered_meta$barcodes
meta$mito_frac <- meta$n_mito / meta$n_fragments
meta$well_label <- paste0(meta$well_id, "\n", meta$pbmc_sample_id)
well_samples <- unique(meta[,c("well_id","pbmc_sample_id","well_label")])
stm("Filtering based on QC cutoffs")
meta <- meta %>%
dplyr::left_join(dplyr::select(filtered_meta, barcodes, DoubletScore, DoubletEnrichment, TSSEnrichment), by = "barcodes")
filtered_meta <- meta
for(i in seq_along(cutoffs)) {
cut_name <- names(cutoffs)[i]
cut_val <- cutoffs[[i]]
filtered_meta <- filtered_meta[filtered_meta[[cut_name]] > cut_val]
filtered_meta <- filtered_meta[filtered_meta$filtered,]
}
filtered_meta$well_label <- paste0(filtered_meta$well_id, "\n", filtered_meta$pbmc_sample_id)
well_filtered_meta_list <- split(filtered_meta, filtered_meta$well_id)
Set up global metadata for reporting
meta$barcode_category <- "fail_qc"
meta$barcode_category[!meta$filtered & meta$pass_fail == "pass"] <- "pass_doublet"
meta$barcode_category[meta$filtered & meta$pass_fail == "pass"] <- "pass_singlet"
qc_list <- list(report_type = "atac_batch_qc",
report_datetime = as.character(start_time_atac),
report_uuid = ids::uuid(use_time = TRUE),
package = "ATAComb",
package_version = sessionInfo()$otherPkgs$ATAComb$Version,
batch_id = sub("_.+","",sample_names[1]))
out_json <- paste0(out_prefix, "atac_batch_qc_metrics.json")
barcode_counts <- meta[,.(n_barcodes = nrow(.SD),
n_pass_qc = sum(.SD$pass_fail == "pass"),
n_fail_qc = sum(.SD$pass_fail == "fail"),
percent_fail = round(sum(.SD$pass_fail == "fail")/nrow(.SD)*100,2),
pass_singlets = sum(.SD$barcode_category == "pass_singlet"),
pass_doublets = sum(.SD$barcode_category == "pass_doublet"),
percent_doublets = round(sum(.SD$barcode_category == "pass_doublet")/sum(.SD$pass_fail == "pass")*100,2)),
.(well_id,pbmc_sample_id)]
qc_list$barcode_stats <- as.list(barcode_counts)
qc_table(barcode_counts)
qc_stacked_barplot(meta,
category_x = "well_label",
name_x = "Well ID",
category_y = "barcode_category",
category_name = "Barcode Category",
as_fraction = TRUE)
qc_aligned_barplot(meta,
category_x = "well_label",
name_x = "Well ID",
category_y = "barcode_category",
category_name = "Barcode Category")
qc_violin_plot(filtered_meta,
category_x = "well_label",
name_x = "Well ID",
column_y = "DoubletEnrichment",
name_y = "Doublet Enrichment",
log_y = FALSE,
fill = "dodgerblue")
qc_violin_plot(filtered_meta,
category_x = "well_label",
name_x = "Well ID",
column_y = "DoubletScore",
name_y = "Doublet Score",
log_y = FALSE,
fill = "dodgerblue")
qc_violin_plot(filtered_meta,
category_x = "well_label",
name_x = "Well ID",
column_y = "TSSEnrichment",
name_y = "TSS Enrichment",
log_y = FALSE,
fill = "dodgerblue")
Plot Settings
n_grid_columns <- min(length(well_filtered_meta_list),4)
n_grid_rows <- ceiling(length(well_filtered_meta_list)/4)
grid_width <- n_grid_columns * 3
grid_height <- n_grid_rows * 3
fragment_stats <- filtered_meta[,.(n_singlets = nrow(.SD[.SD$filtered]),
med_raw_frag = round(median(n_fragments),0),
med_raw_perc_mito = round(median(mito_frac)*100,4),
med_unique_frag = round(median(n_unique),0),
med_unique_fritss = round(median(tss_frac),4),
med_unique_frip = round(median(peaks_frac),4),
med_unique_encode = round(median(altius_frac),4)
),
.(well_id,pbmc_sample_id)]
qc_list$fragment_stats <- as.list(fragment_stats)
qc_table(fragment_stats)
category_reads_violins <- qc_violin_plot(meta,
category_x = "barcode_category",
name_x = "Barcode Type",
column_y = "n_unique",
name_y = "Unique Fragments",
fill = "dodgerblue")
well_reads_violins <- qc_violin_plot(filtered_meta,
category_x = "well_label",
name_x = "Well ID",
column_y = "n_unique",
name_y = "Unique Fragments (Singlets)",
fill = "dodgerblue")
reads_violin_list <- list(category_reads_violins,
well_reads_violins)
plot_grid(plotlist = reads_violin_list,
ncol = 2, rel_widths = c(1, 3),
nrow = 1, align = "h")
category_mito_violins <- qc_violin_plot(meta,
category_x = "barcode_category",
name_x = "Barcode Type",
column_y = "mito_frac",
name_y = "Fraction Mitochondrial",
fill = "darkgreen",
log_y = FALSE)
well_mito_violins <- qc_violin_plot(filtered_meta,
category_x = "well_label",
name_x = "Well ID",
column_y = "mito_frac",
name_y = "Fraction Mito. (Singlets)",
fill = "darkgreen",
log_y = FALSE)
mito_violin_list <- list(category_mito_violins,
well_mito_violins)
plot_grid(plotlist = mito_violin_list,
ncol = 2, rel_widths = c(1, 3),
nrow = 1, align = "h")
category_frip_violins <- qc_violin_plot(meta,
category_x = "barcode_category",
name_x = "Barcode Type",
column_y = "peaks_frac",
name_y = "FRIP",
fill = "orangered",
log_y = FALSE)
well_frip_violins <- qc_violin_plot(filtered_meta,
category_x = "well_label",
name_x = "Well ID",
column_y = "peaks_frac",
name_y = "FRIP (Singlets)",
fill = "orangered",
log_y = FALSE)
frip_violin_list <- list(category_frip_violins,
well_frip_violins)
plot_grid(plotlist = frip_violin_list,
ncol = 2, rel_widths = c(1, 3),
nrow = 1, align = "h")
qc_scatter_list <- map(well_filtered_meta_list,
function(well_meta) {
qc_scatter_plot(well_meta,
column_x = "n_unique",
name_x = "N Unique Fragments per Cell",
column_y = "peaks_frac",
name_y = "Frac Fragments in Peaks (peaks_frac)",
log_x = TRUE, log_y = FALSE, frac_y = TRUE,
show_targets = FALSE,
color = "orangered") +
geom_vline(aes(xintercept = 2.5e3), linetype = "dashed", size = 0.2) +
geom_hline(aes(yintercept = cutoffs$peaks_frac), linetype = "dashed", size = 0.2) +
ggtitle(well_meta$well_label[1])
})
plot_grid(plotlist = qc_scatter_list,
ncol = n_grid_columns,
nrow = n_grid_rows)
category_fritss_violins <- qc_violin_plot(meta,
category_x = "barcode_category",
name_x = "Barcode Type",
column_y = "tss_frac",
name_y = "FRITSS",
fill = "mediumorchid3",
log_y = FALSE)
well_fritss_violins <- qc_violin_plot(filtered_meta,
category_x = "well_label",
name_x = "Well ID",
column_y = "tss_frac",
name_y = "FRITSS (Singlets)",
fill = "mediumorchid3",
log_y = FALSE)
fritss_violin_list <- list(category_fritss_violins,
well_fritss_violins)
plot_grid(plotlist = fritss_violin_list,
ncol = 2, rel_widths = c(1, 3),
nrow = 1, align = "h")
qc_scatter_list <- map(well_filtered_meta_list,
function(well_meta) {
qc_scatter_plot(well_meta,
column_x = "n_unique",
name_x = "N Unique Fragments per Cell",
column_y = "tss_frac",
name_y = "Frac Fragments in TSS (tss_frac)",
log_x = TRUE, log_y = FALSE, frac_y = TRUE,
show_targets = FALSE,
color = "mediumorchid3") +
geom_vline(aes(xintercept = 2.5e3), linetype = "dashed", size = 0.2) +
geom_hline(aes(yintercept = cutoffs$tss_frac), linetype = "dashed", size = 0.2) +
ggtitle(well_meta$well_label[1])
})
plot_grid(plotlist = qc_scatter_list,
ncol = n_grid_columns,
nrow = n_grid_rows)
category_enc_violins <- qc_violin_plot(meta,
category_x = "barcode_category",
name_x = "Barcode Type",
column_y = "altius_frac",
name_y = "FRIENCODE",
fill = "darkred",
log_y = FALSE)
well_enc_violins <- qc_violin_plot(filtered_meta,
category_x = "well_label",
name_x = "Well ID",
column_y = "altius_frac",
name_y = "FRIENCODE (Singlets)",
fill = "darkred",
log_y = FALSE)
enc_violin_list <- list(category_enc_violins,
well_enc_violins)
plot_grid(plotlist = enc_violin_list,
ncol = 2, rel_widths = c(1, 3),
nrow = 1, align = "h")
qc_scatter_list <-map(well_filtered_meta_list,
function(well_meta) {
qc_scatter_plot(well_meta,
column_x = "n_unique",
name_x = "N Unique Fragments per Cell",
column_y = "altius_frac",
name_y = "Frac Fragments in Altius (altius_frac)",
log_x = TRUE, log_y = FALSE, frac_y = TRUE,
show_targets = FALSE,
color = "darkred") +
geom_vline(aes(xintercept = 2.5e3), linetype = "dashed", size = 0.2) +
geom_hline(aes(yintercept = cutoffs$altius_frac), linetype = "dashed", size = 0.2) +
ggtitle(well_meta$well_label[1])
})
plot_grid(plotlist = qc_scatter_list,
ncol = n_grid_columns,
nrow = n_grid_rows)
all_sat <- map_dfr(1:length(sat_list),
function(x) {
sat <- sat_list[[x]]
sat_pbmc_sample <- sub(paste0(batch_id,"_"),"",names(sat_list)[x])
sat$pbmc_sample_id <- sat_pbmc_sample
sat$well_id <- well_samples$well_id[well_samples$pbmc_sample_id == sat_pbmc_sample]
sat$well_label <- well_samples$well_label[well_samples$pbmc_sample_id == sat_pbmc_sample]
sat
})
all_sat <- as.data.table(all_sat)
sat_cutoffs <- all_sat[,.(M_raw = max(.SD$M_raw_reads[.SD$type == "actual"]),
M_umis = max(.SD$M_umis[.SD$type == "actual"]),
M_signal = max(.SD$M_signal_umis[.SD$type == "actual"]),
M_raw_2x_UMIs = .SD$M_raw_reads[which.min(abs(.SD$ratio - 2))[1]],
M_raw_4x_UMIs = .SD$M_raw_reads[which.min(abs(.SD$ratio - 4))[1]],
M_raw_2x_signal = .SD$M_raw_reads[which.min(abs(.SD$signal_ratio - 2))[1]],
M_raw_4x_signal = .SD$M_raw_reads[which.min(abs(.SD$signal_ratio - 4))[1]],
M_raw_8x_signal = .SD$M_raw_reads[which.min(abs(.SD$signal_ratio - 8))[1]]),
.(well_id,pbmc_sample_id)]
qc_list$saturation_stats <- as.list(sat_cutoffs)
qc_table(sat_cutoffs)
reference_projections <- fread(system.file("reference/saturation/reference_projections.csv",
package = "ATAComb"))
reference_projections$dataset <- paste0("ref_",reference_projections$dataset)
umi_saturation_plot <- ggplot() +
geom_line(data = reference_projections,
aes(x = n_raw_reads,
y = expected_umis,
group = dataset,
color = dataset)) +
geom_line(data = all_sat,
aes(x = n_raw_reads,
y = expected_umis,
group = well_label,
color = type),
size = 2) +
scale_color_brewer("Data Type",
type = "qual",
palette = 2) +
scale_x_continuous("N Raw Reads (millions)",
expand = c(0,0),
limits = c(0, 1.5e9),
breaks = seq(0, 1.5e9, by = 2.5e8),
labels = seq(0, 1500, by = 250)) +
scale_y_continuous("N Unique Fragments (millions)",
expand = c(0,0),
limits = c(0, 5e8),
breaks = seq(0, 5e8, by = 1e8),
labels = seq(0, 500, by = 100)) +
theme_bw() +
theme(panel.grid.minor = element_blank()) +
facet_wrap(facets = vars(well_label), ncol = 4) +
ggtitle("Saturation of All Unique Fragments")
umi_saturation_plot
Signal fragments are reads in Altius Index regions in barcodes passing QC
signal_saturation_plot <- ggplot() +
geom_line(data = reference_projections,
aes(x = n_raw_reads,
y = signal_umis,
group = dataset,
color = dataset)) +
geom_line(data = all_sat,
aes(x = n_raw_reads,
y = signal_umis,
group = well_label,
color = type),
size = 2) +
scale_color_brewer("Data Type",
type = "qual",
palette = 2) +
scale_x_continuous("N Raw Reads (millions)",
expand = c(0,0),
limits = c(0, 1.5e9),
breaks = seq(0, 1.5e9, by = 2.5e8),
labels = seq(0, 1500, by = 250)) +
scale_y_continuous("N Signal Fragments (millions)",
expand = c(0,0),
limits = c(0, 2e8),
breaks = seq(0, 2e8, by = 2e7),
labels = seq(0, 200, by = 20)) +
theme_bw() +
theme(panel.grid.minor = element_blank()) +
facet_wrap(facets = vars(well_label), ncol = 4) +
ggtitle("Saturation of Signal Fragments")
signal_saturation_plot
Plot Settings
n_grid_columns <- min(length(well_filtered_meta_list),2)
n_grid_rows <- ceiling(length(well_filtered_meta_list)/2)
grid_width <- n_grid_columns * 4
grid_height <- n_grid_rows * 2
frag_widths <- map_dfr(1:length(width_list),
function(x) {
fw <- width_list[[x]]
fw_pbmc_sample <- sub(paste0(batch_id,"_"),"",names(width_list)[x])
fw$pbmc_sample_id <- fw_pbmc_sample
fw$well_id <- well_samples$well_id[well_samples$pbmc_sample_id == fw_pbmc_sample]
fw$well_label <- well_samples$well_label[well_samples$pbmc_sample_id == fw_pbmc_sample]
fw
})
frag_widths <- frag_widths[width <= 750]
ggplot(data = frag_widths) +
geom_line(aes(x = width,
y = frac,
group = pass_fail,
color = pass_fail),
size = 1) +
xlim(0, 750) +
facet_wrap(vars(well_label), ncol = 2) +
theme_bw()
stm(paste0("Writing JSON to ",out_json))
qc_list_json <- jsonlite::toJSON(qc_list,
auto_unbox = TRUE,
pretty = TRUE)
writeLines(qc_list_json,
out_json)
sessionInfo()
## R version 4.0.3 (2020-10-10)
## Platform: x86_64-apple-darwin17.0 (64-bit)
## Running under: macOS Catalina 10.15.7
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRblas.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRlapack.dylib
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] plyr_1.8.6 viridis_0.5.1 viridisLite_0.3.0
## [4] Seurat_3.9.9.9010 future.apply_1.6.0 future_1.19.1
## [7] purrr_0.3.4 jsonlite_1.7.1 tidyr_1.1.2
## [10] plotly_4.9.3 gt_0.2.2 cowplot_1.1.0
## [13] dplyr_1.0.4 ggplot2_3.3.2 HTOparser_1.0.12
## [16] H5weaver_1.2.0 data.table_1.13.2 rhdf5_2.32.4
## [19] Matrix_1.2-18 batchreporter_1.1.0 optparse_1.6.6
##
## loaded via a namespace (and not attached):
## [1] Rtsne_0.15 colorspace_1.4-1 deldir_0.1-29
## [4] ellipsis_0.3.1 ggridges_0.5.2 spatstat.data_2.1-0
## [7] farver_2.0.3 leiden_0.3.3 listenv_0.8.0
## [10] DT_0.16 getopt_1.20.3 ggrepel_0.8.2
## [13] RSpectra_0.16-0 R.methodsS3_1.8.1 codetools_0.2-16
## [16] splines_4.0.3 knitr_1.30 polyclip_1.10-0
## [19] ica_1.0-2 cluster_2.1.0 R.oo_1.24.0
## [22] png_0.1-7 uwot_0.1.9.9000 shiny_1.5.0
## [25] sctransform_0.3.1 compiler_4.0.3 httr_1.4.2
## [28] backports_1.1.10 assertthat_0.2.1 fastmap_1.0.1
## [31] lazyeval_0.2.2 later_1.1.0.1 htmltools_0.5.1.1
## [34] tools_4.0.3 rsvd_1.0.3 igraph_1.2.6
## [37] gtable_0.3.0 glue_1.4.2 RANN_2.6.1
## [40] reshape2_1.4.4 Rcpp_1.0.5 spatstat_1.64-1
## [43] vctrs_0.3.6 nlme_3.1-149 crosstalk_1.1.0.1
## [46] lmtest_0.9-38 xfun_0.18 stringr_1.4.0
## [49] globals_0.13.1 mime_0.9 miniUI_0.1.1.1
## [52] lifecycle_0.2.0 irlba_2.3.3 goftest_1.2-2
## [55] ids_1.0.1 MASS_7.3-53 zoo_1.8-8
## [58] scales_1.1.1 promises_1.1.1 spatstat.utils_2.1-0
## [61] parallel_4.0.3 RColorBrewer_1.1-2 yaml_2.2.1
## [64] reticulate_1.16 pbapply_1.4-3 gridExtra_2.3
## [67] sass_0.3.1 rpart_4.1-15 stringi_1.5.3
## [70] checkmate_2.0.0 commonmark_1.7 rlang_0.4.10
## [73] pkgconfig_2.0.3 matrixStats_0.57.0 evaluate_0.14
## [76] lattice_0.20-41 ROCR_1.0-11 tensor_1.5
## [79] Rhdf5lib_1.10.1 labeling_0.4.2 patchwork_1.0.1
## [82] htmlwidgets_1.5.3 tidyselect_1.1.0 RcppAnnoy_0.0.16
## [85] magrittr_1.5 R6_2.4.1 generics_0.0.2
## [88] DBI_1.1.1 mgcv_1.8-33 pillar_1.4.6
## [91] withr_2.3.0 fitdistrplus_1.1-1 survival_3.2-7
## [94] abind_1.4-5 tibble_3.0.4 crayon_1.3.4
## [97] uuid_0.1-4 KernSmooth_2.23-17 rmarkdown_2.4
## [100] grid_4.0.3 digest_0.6.26 xtable_1.8-4
## [103] httpuv_1.5.4 R.utils_2.10.1 munsell_0.5.0
Total time elapsed
end_time <- Sys.time()
diff_time <- end_time - start_time_atac
time_message <- paste0("Elapsed Time: ",
round(diff_time, 3),
" ", units(diff_time))
print(time_message)
## [1] "Elapsed Time: 15.957 secs"
stm(time_message)
stm("10x ATAC Batch QC Report complete.")
scATAC report module 1.0.0, Lucas Graybuck
Input Directory:
in_dir
## [1] "~/Packages/batchreporter/inst/extdata/X070"
Input Directory Contents:
folders <- list.dirs(in_dir, recursive = FALSE)
file_list <- lapply(folders, function(x){
dir(x, recursive = TRUE)
})
names(file_list) <- basename(folders)
file_list
## $adt
## [1] "X070-EP1C1W1_adt_positive_tag_counts.csv"
## [2] "X070-EP1C1W1_Tag_Counts.csv"
## [3] "X070-EP1C1W2_adt_positive_tag_counts.csv"
## [4] "X070-EP1C1W2_Tag_Counts.csv"
## [5] "X070-EP1C1W3_adt_positive_tag_counts.csv"
## [6] "X070-EP1C1W3_Tag_Counts.csv"
##
## $hto
## [1] "X070-P1C1W1_hto_category_table.csv.gz"
## [2] "X070-P1C1W1_hto_count_matrix.csv.gz"
## [3] "X070-P1C1W1_hto_processing_metrics.json"
## [4] "X070-P1C1W2_hto_category_table.csv.gz"
## [5] "X070-P1C1W2_hto_count_matrix.csv.gz"
## [6] "X070-P1C1W2_hto_processing_metrics.json"
## [7] "X070-P1C1W3_hto_category_table.csv.gz"
## [8] "X070-P1C1W3_hto_count_matrix.csv.gz"
## [9] "X070-P1C1W3_hto_processing_metrics.json"
##
## $scatac
## [1] "X070_X070-MP1C1W1_all_metadata.csv.gz"
## [2] "X070_X070-MP1C1W1_filtered_metadata.csv.gz"
## [3] "X070_X070-MP1C1W1_fragment_width_summary.csv.gz"
## [4] "X070_X070-MP1C1W1_saturation_projection.csv.gz"
## [5] "X070_X070-MP1C1W2_all_metadata.csv.gz"
## [6] "X070_X070-MP1C1W2_filtered_metadata.csv.gz"
## [7] "X070_X070-MP1C1W2_fragment_width_summary.csv.gz"
## [8] "X070_X070-MP1C1W2_saturation_projection.csv.gz"
## [9] "X070_X070-MP1C1W3_all_metadata.csv.gz"
## [10] "X070_X070-MP1C1W3_filtered_metadata.csv.gz"
## [11] "X070_X070-MP1C1W3_fragment_width_summary.csv.gz"
## [12] "X070_X070-MP1C1W3_saturation_projection.csv.gz"
##
## $scrna
## [1] "X070-P1C1W1.h5" "X070-P1C1W2.h5" "X070-P1C1W3.h5"
Batch processing metadata:
in_batch_meta
## [1] NA
Config File:
in_config
## [1] "~/Packages/batchreporter/inst/extdata/default_rna_config_v1.csv"
Key File:
in_key
## [1] "~/Packages/batchreporter/inst/extdata/example_sample_key_X070_dummy-nonhashed.csv"
Output Directory:
out_dir
## [1] "~/Projects/X070/test_reports"
Session Info:
sessionInfo()
## R version 4.0.3 (2020-10-10)
## Platform: x86_64-apple-darwin17.0 (64-bit)
## Running under: macOS Catalina 10.15.7
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRblas.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRlapack.dylib
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] plyr_1.8.6 viridis_0.5.1 viridisLite_0.3.0
## [4] Seurat_3.9.9.9010 future.apply_1.6.0 future_1.19.1
## [7] purrr_0.3.4 jsonlite_1.7.1 tidyr_1.1.2
## [10] plotly_4.9.3 gt_0.2.2 cowplot_1.1.0
## [13] dplyr_1.0.4 ggplot2_3.3.2 HTOparser_1.0.12
## [16] H5weaver_1.2.0 data.table_1.13.2 rhdf5_2.32.4
## [19] Matrix_1.2-18 batchreporter_1.1.0 optparse_1.6.6
##
## loaded via a namespace (and not attached):
## [1] Rtsne_0.15 colorspace_1.4-1 deldir_0.1-29
## [4] ellipsis_0.3.1 ggridges_0.5.2 spatstat.data_2.1-0
## [7] farver_2.0.3 leiden_0.3.3 listenv_0.8.0
## [10] DT_0.16 getopt_1.20.3 ggrepel_0.8.2
## [13] RSpectra_0.16-0 R.methodsS3_1.8.1 codetools_0.2-16
## [16] splines_4.0.3 knitr_1.30 polyclip_1.10-0
## [19] ica_1.0-2 cluster_2.1.0 R.oo_1.24.0
## [22] png_0.1-7 uwot_0.1.9.9000 shiny_1.5.0
## [25] sctransform_0.3.1 compiler_4.0.3 httr_1.4.2
## [28] backports_1.1.10 assertthat_0.2.1 fastmap_1.0.1
## [31] lazyeval_0.2.2 later_1.1.0.1 htmltools_0.5.1.1
## [34] tools_4.0.3 rsvd_1.0.3 igraph_1.2.6
## [37] gtable_0.3.0 glue_1.4.2 RANN_2.6.1
## [40] reshape2_1.4.4 Rcpp_1.0.5 spatstat_1.64-1
## [43] vctrs_0.3.6 nlme_3.1-149 crosstalk_1.1.0.1
## [46] lmtest_0.9-38 xfun_0.18 stringr_1.4.0
## [49] globals_0.13.1 mime_0.9 miniUI_0.1.1.1
## [52] lifecycle_0.2.0 irlba_2.3.3 goftest_1.2-2
## [55] ids_1.0.1 MASS_7.3-53 zoo_1.8-8
## [58] scales_1.1.1 promises_1.1.1 spatstat.utils_2.1-0
## [61] parallel_4.0.3 RColorBrewer_1.1-2 yaml_2.2.1
## [64] reticulate_1.16 pbapply_1.4-3 gridExtra_2.3
## [67] sass_0.3.1 rpart_4.1-15 stringi_1.5.3
## [70] checkmate_2.0.0 commonmark_1.7 rlang_0.4.10
## [73] pkgconfig_2.0.3 matrixStats_0.57.0 evaluate_0.14
## [76] lattice_0.20-41 ROCR_1.0-11 tensor_1.5
## [79] Rhdf5lib_1.10.1 labeling_0.4.2 patchwork_1.0.1
## [82] htmlwidgets_1.5.3 tidyselect_1.1.0 RcppAnnoy_0.0.16
## [85] magrittr_1.5 R6_2.4.1 generics_0.0.2
## [88] DBI_1.1.1 mgcv_1.8-33 pillar_1.4.6
## [91] withr_2.3.0 fitdistrplus_1.1-1 survival_3.2-7
## [94] abind_1.4-5 tibble_3.0.4 crayon_1.3.4
## [97] uuid_0.1-4 KernSmooth_2.23-17 rmarkdown_2.4
## [100] grid_4.0.3 digest_0.6.26 xtable_1.8-4
## [103] httpuv_1.5.4 R.utils_2.10.1 munsell_0.5.0
Total time elapsed
end_time_all <- Sys.time()
diff_time_all <- end_time_all - start_time_all
time_message_all <- paste0("Elapsed Time: ",
round(diff_time_all, 3),
" ", units(diff_time_all))
print(time_message_all)
## [1] "Elapsed Time: 5.28 mins"
stm(time_message_all)
stm("Batch report process complete.")
NGS report v.1.0.0, Lauren Okada